function [] = subsetoverlap(varargin)
%This function allows you to calculate the overlap of synaptic subsets, after
%subset classification.  
%Synatax:   [] = subsetoverlap('dir1','c:\...')
%Input:     'dir1' = the root directory of the subclass
%           'dirsav' = the directory where you want to save the new subclass
%               to.
%           note: if nothing is entered, you will be prompted to select
%           dir1 and dirsav
%Output:    none

[dir1,dirsav] = parse(varargin);

%sort out the directories of the subclass
[dirnames,class_names,dirlabel] = dir_sort(dir1);

%now calculate the overlap for each overclass
for i = 1:size(class_names,2)
    dirnames_tmp = dirnames{i};     %take out the set of directories for this over class
    dirlabel_tmp = dirlabel{i};     %grab the directory labels
    %origin vertices name
    switch class_names{i}
        case 'GAD'      %inhibitory origin use Gephyrin
            matchstr = 'Geph';
        otherwise       %excitatory origin use PSD95
            matchstr = 'PSD95';
    end
    for j = 1:size(dirnames_tmp,2)-1    %first comparision directory, never need to go to the end
        %first get the directory structure information
        dir_struct1 = dir([dir1,filesep,dirnames_tmp{j}]);
        [filename] = pullfile(dir_struct1,matchstr,'avert');    %grab the origins filename
        for k = j+1:size(dirnames_tmp,2)    %second comparision directory
            %first get the directory structure information
            dir_struct2 = dir([dir1,filesep,dirnames_tmp{k}]);
            [filename2] = pullfile(dir_struct2,matchstr,'avert');   %grab the second origins filename
            %grab the ordinals of the two directories and create
            %comparision indexes
            %open files first
            try     %if the file is empty
                ori1 = dlmread([dir1,filesep,dirnames_tmp{j},filesep,filename],',',1,0);
            catch   %substitute empty
                ori1 = [0];
            end
            try     %if the file is empty
            ori2 = dlmread([dir1,filesep,dirnames_tmp{k},filesep,filename2],',',1,0);
            catch   %substitue empty
                ori2 = [0];
            end
            %we could compare the vertices, but we only need the ordinals 
            ord1 = ori1(:,end);
            ord2 = ori2(:,end);
            %first find the nonunique values
            [nuord,nuidx1,nuidx2] = intersect(ord1,ord2);
            %second find the unique values
            [uord1,uidx1] = setdiff(ord1,ord2);
            [uord2,uidx2] = setdiff(ord2,ord1);
            %now that we have the ordinals and the indexes, it's time to
            %process the subclass data
            if size(ori1,2)>=3   %no ori1, no dice
                %not unique 1 vs 2
                dirtmp = [class_names{i},'_',dirlabel_tmp{j},'_vs_',dirlabel_tmp{k},'_notunique'];
                mkdir(dirsav,dirtmp);   %make the directory
                process_overlap([dir1,filesep,dirnames_tmp{j}],dir_struct1,nuidx1,nuord,[dirsav,filesep,dirtmp])
                %unique 1 vs 2
                dirtmp = [class_names{i},'_',dirlabel_tmp{j},'_vs_',dirlabel_tmp{k},'_unique'];
                mkdir(dirsav,dirtmp);   %make the directory
                process_overlap([dir1,filesep,dirnames_tmp{j}],dir_struct1,uidx1,uord1,[dirsav,filesep,dirtmp])
            end
            if size(ori2,2)>=3  %no ori2, no go
                %not unique 2 vs 1
                dirtmp = [class_names{i},'_',dirlabel_tmp{k},'_vs_',dirlabel_tmp{j},'_notunique'];
                mkdir(dirsav,dirtmp);   %make the directory
                process_overlap([dir1,filesep,dirnames_tmp{k}],dir_struct2,nuidx2,nuord,[dirsav,filesep,dirtmp])
                %unique 2 vs 1
                dirtmp = [class_names{i},'_',dirlabel_tmp{k},'_vs_',dirlabel_tmp{j},'_unique'];
                mkdir(dirsav,dirtmp);   %make the directory
                process_overlap([dir1,filesep,dirnames_tmp{k}],dir_struct2,uidx2,uord2,[dirsav,filesep,dirtmp])
            end
        end
    end
end

%--------------------------------------------------------------------------
%subfunction to parse the inputs.
function [dir1,dirsav] = parse(input)

dir1 = [];
dirsav = [];

%Parse the input
if ~isempty(input)
    for i = 1:2:size(input,2)
        if ischar(input{1,i});
            switch input{1,i}
                case 'dir1'
                    dir1 = input{1,i+1};
                case 'dirsav'
                    dirsav = input{1,i+1};
                otherwise
                    warning(['Your input ',input{1,i},' is not recognized.']);
            end
        else
            error(['The parameters you entered is incorrect.  Please check help.']);
        end
    end
end

    
%now check if dir1 and dirsav are entered
if isempty(dir1)   %vert mode will generate it's own dir2
    dir1 = uigetdir2('','Directory of subclass');
end
if isempty(dirsav)
    dirsav = uigetdir3('','Directory to save the data of subclass');
end
%-----------------------------------------------------------------------------------------------------------------------------
function [dirnames_out,class_names,dirlabel] = dir_sort(dir_tmp)
%give the directory and it will sort out the directory in it and put a cell
%array of directory locations and the over class they are associated with
dir_struct = dir(dir_tmp);  %grab the directory information
idx = [dir_struct.isdir];   %grab all of the isdir numbers
names = {dir_struct.name};   %grab the all of the names in the root
dirnames = names(idx);
%filename_tmp = filename_tmp(3:end,:);
for i = 3:size(dirnames,2)      %step through each filename and pull the wanted word
    dirnames_tmp = dirnames{i};
    strmask = isstrprop(dirnames_tmp,'punct');  %find the punctuations, we only want the last two
    strmask2 = dirnames_tmp=='&';       %we are going to exempt the & character
    strmask3 = isstrprop(dirnames_tmp,'wspace');    %included the spaces as well
    strmask = strmask-strmask2+strmask3;     %removed and append
    [x,y] = find(strmask==1);  %get the positions
    %In the current scheme of things the overclass names are actually at
    %the end of the directory name, e.g., Synpod-PSD-Syn_VGlut1.  So, we
    %just need to take eveything from the end to the last punctuation.
    f_tmp{i-2} = dirnames_tmp(1,y(end)+1:end);
    dirlabel_tmp{i-2} = dirnames_tmp(1,1:y(end)-1);   %get the directory name without the overclass name or labeling purposes
end
class_names = unique(f_tmp);    %how many unique words are there
%make sure there are no duplicates with different cases
for l = 1:size(class_names,2)      %step through class names
    didx = 1;   %index of directory
    for m = 1:size(f_tmp,2)     %step through the all directory names
        if strcmpi(class_names{l},f_tmp{m})    %if there is a match (case insensitive now) put in cell array
            dirnames_out{l}{didx} = dirnames{m+2};
            dirlabel{l}{didx} = dirlabel_tmp{m};
            didx = didx+1;
        end
    end
end 

%--------------------------------------------------------------------------
%subfunction to seperate files from directories
function [aprop,avert,aflat,arotvert] = process_files(dir_struct)

for i = 3:size(dir_struct,1)    %go through the 
    idx(i-2) = dir_struct(i).isdir;     %pull the isdir info
    names{i-2} = dir_struct(i).name;     %pull the file and dir names
end

%get the files
try
    list = names(~idx);
catch
    keyboard
end
%filter for only the files we want
aprop_idx = ones(size(list));   %same for prop
avert_idx = ones(size(list));   %create an index for selecting the flat files
aflat_idx = ones(size(list));
arotvert_idx = ones(size(list));
for j = 1:size(list,2);
    stridx = isstrprop(list{j},'punct');
    stridx = find(stridx==1);   %the second '_' is the one
    %the only files we care about are the flat rotated vertices and the properties for now
    if ~strcmp('aprop',list{j}(1:stridx(1)-1))
        aprop_idx(j) = 0;    %set this for removal
    end
    if ~strcmp('avert',list{j}(1:stridx(1)-1))
        avert_idx(j) = 0;    %set this for removal
    end
    if ~strcmp('aflat',list{j}(1:stridx(1)-1))
        aflat_idx(j) = 0;    %set this for removal
    end
    if ~strcmp('a3Dvert',list{j}(1:stridx(1)-1))
        arotvert_idx(j) = 0;    %set this for removal
    end
end
%create the desired lists of file names.
aprop = list(logical(aprop_idx));    %list for prop filesflat = list(logical(flat_idx));    %list for flat vertices files
avert = list(logical(avert_idx));    %list for flat vertices files
aflat = list(logical(aflat_idx));
arotvert = list(logical(arotvert_idx));
%--------------------------------------------------------------------------
%subfunction pull verticies file
function [filename] = pullfile(dir_struct,matchstr,propstr)
idx = [dir_struct.isdir];   %grab all of the isdir numbers
names = {dir_struct.name};   %grab the all of the names in the root
filenames = names(~idx);
%filename_tmp = filename_tmp(3:end,:);
for i = 1:size(filenames,2)      %step through each filename and pull the wanted word
    filename_tmp = filenames{i};
    strmask = isstrprop(filename_tmp,'punct');  %find the punctuations, we only want the last two
    strmask2 = filename_tmp=='&';       %we are going to exempt the & character
    strmask3 = isstrprop(filename_tmp,'wspace');    %included the spaces as well
    strmask = strmask-strmask2+strmask3;     %removed and append
    [x,y] = find(strmask==1);  %get the positions
    %we only care about the propstr files
    if strcmp(propstr,filename_tmp(1,1:y(1)-1))
        a = 1;  %initiate
        b = 0;
        tmp = filename_tmp(1,y(end-a)+1:y(end-b)-1);    %create an array of the words
        while ~isempty(str2num(tmp))   %not empty = numbers or vertices file, push forward one and go
            a = a+1;
            b = b+1;
            if size(y,2)-a==0
                break
            end
            tmp = filename_tmp(1,y(end-a)+1:y(end-b)-1);    %create an array of the words
        end
        %     %now do the same for the front
        %     a = 0;
        %     tmp = filename_tmp(1,1:y(1+a)-1);
        %     while ~isempty(str2num(tmp))&&size(y,2)~=a+1   %not empty = numbers or vertices file, push forward one and go
        %         a = a+1;
        %         tmp = filename_tmp(1,1:y(1+a)-1);    %create an array of the words
        %     end
        %now we want to find the filename we are looking for
        if strcmp(matchstr,filename_tmp(1,y(1)+1:y(end-b)-1))
            filename = filename_tmp;
            break   %done
        end
    end
end

%--------------------------------------------------------------------------
function [] = process_overlap(dir2,dir_info,idx,ord,dirsav)
[aprop,avert,aflat,arotvert] = process_files(dir_info);     %now get the files we want to process

%initiate
acount = [];
aucount = [];

%go through the files apply the index and save to new directory
for i = 1:size(aprop,2) %process the property files
    prop_tmp = single(dlmread([dir2,filesep,aprop{i}],',',1,0));    %open the file
    prop_tmp = prop_tmp(idx,:);     %the few, the proud, the selected
    sav2csv(prop_tmp,[aprop{i}(1:end-4),'_',num2str(size(prop_tmp,1))],dirsav);    %save out
    %now lets output the prop summary files: Note: we are assuming that
    %colume two is the volume properties
    out = median(prop_tmp(:,2))+std(prop_tmp(:,2))*3;     %3 times the standard deviation should do it.
    [x,y] = find(prop_tmp(:,2)>out);        %find the outliers.
    prop_cache = prop_tmp;   %we now want the prop to match the vert output, so work on a temporary var instead
    prop_cache(x,:) = [];             %remove the outliers
    prop_cache(isnan(prop_cache(:,1)),:) = [];    %remove not a numbers
    sdata(1,:) = mean(prop_cache,1);
    sdata(2,:) = std(prop_cache,1);
    sdata(3,:) = size(prop_cache,1);
    sdata(4,:) = median(prop_cache,1);
    %create a cell array of file names to label the count files
    strmask = isstrprop(aprop{i},'punct');  %find the punctuations, we only want the last two
    strmask2 = aprop{i}=='&';       %we are going to exempt the & character
    strmask3 = isstrprop(aprop{i},'wspace');    %included the spaces as well
    strmask = strmask-strmask2+strmask3;     %removed and append
    [x,y] = find(strmask==1);  %get the positions
    a = 1;  %initiate
    b = 0;
    tmp = aprop{i}(y(end-a)+1:y(end-b)-1);    %create an array of the words
    while ~isempty(str2num(tmp))   %not empty = numbers or vertices file, push forward one and go
        a = a+1;
        b = b+1;
        if size(y,2)-a==0
            break
        end
        tmp = aprop{i}(y(end-a)+1:y(end-b)-1);    %create an array of the words
    end
    %now do the same for the front
    a = 0;
    tmp = aprop{i}(1:y(1+a)-1);
    while ~isempty(str2num(tmp))&&size(y,2)~=a+1   %not empty = numbers or vertices file, push forward one and go
        a = a+1;
        tmp = aprop{i}(1:y(1+a)-1);    %create an array of the words
    end
    if a==0
        celllabels{i} = aprop{i}(y(1)+1:y(end-b)-1);
    else
        celllabels{i} = aprop{i}(y(a)+1:y(end-b)-1);
    end
    sav2csv(sdata,['asum_',celllabels{i}],dirsav);     %save the properties summary
end
for j = 1:size(avert,2) %process the vertices files
    vert_tmp = single(dlmread([dir2,filesep,avert{j}],',',1,0));    %open the file
    vert_tmp = vert_tmp(idx,:);     %the few, the proud, the selected
    sav2csv(vert_tmp,[avert{j}(1:end-4),'_',num2str(size(vert_tmp,1))],dirsav);    %save out
    %lets generate the count statistic files
    acount = [acount size(vert_tmp,1)];    %do the same for the all data
    aucount = [aucount size(unique(vert_tmp,'rows'),1)];   %get the unique count from all data
end
%now process the rotations
for k = 1:size(aflat,2) %process the flat files
    try %just in case of empty files
        flat_tmp = single(dlmread([dir2,filesep,aflat{k}],',',1,0));    %open the file
        flat_idx = zeros(size(flat_tmp,1),1);   %create the index base
        for m = 1:size(ord,1)   %go through the ordinals
            flat_idx(flat_tmp(:,end)==ord(m,1),:) = 1;      %set to keep
        end
        flat_tmp = flat_tmp(logical(flat_idx),:);     %the few, the proud, the selected
        sav2csv(flat_tmp,aflat{k}(1:end-4),dirsav);    %save out
    catch
        warning([dirsav,filesep,'aflat']);
    end
end
for l = 1:size(arotvert,2) %process the flat files
    try     %ditto on top, not elegant...I know
        rotvert_tmp = single(dlmread([dir2,filesep,arotvert{l}],',',1,0));    %open the file
        rotvert_idx = zeros(size(rotvert_tmp,1),1);   %create the index base
        for m = 1:size(ord,1)   %go through the ordinals
            rotvert_idx(rotvert_tmp(:,end)==ord(m,1),:) = 1;      %set to keep
        end
        rotvert_tmp = rotvert_tmp(logical(rotvert_idx),:);     %the few, the proud, the selected
        sav2csv(rotvert_tmp,arotvert{l}(1:end-4),dirsav);    %save out
    catch
        warning([dirsav,filesep,'3D']);
    end
end
%now do the rotated termini
try     %this is a hack, in the python version we got to streamline the rotation aspect of this.
    rotterm_tmp = single(dlmread([dir2,filesep,'arotated_termini.csv'],',',1,0));    %open
    rotterm = rotterm_tmp(idx,:);   %select
    sav2csv(rotterm,'arotated_termini.csv',dirsav);    %save
catch
    warning([dirsav,filesep,'arotated_termini.csv']);
end     %if didn't work, means the rotated termini was not the same size, just skip it, because not important at this point.

%save out summary data files
%count data
data_tmp = dataset(acount','ObsNames',celllabels','VarNames',{'count'});
sav2csv(data_tmp,'allcount_summary.csv',dirsav);
data_tmp = dataset(aucount','ObsNames',celllabels','VarNames',{'count'});
sav2csv(data_tmp,'alluniquecount_summary.csv',dirsav);